Loading the Tidy Data

Load the tidy data from disk if it is available. If it is not available, run notebook-0-tidy.Rmd to create it.

df <- readRDS('../data/processed/tidy.Rds')
str(df)
## 'data.frame':    1537 obs. of  29 variables:
##  $ owner_id                  : chr  "0031b2aaeba16a973a7126a3919fedd6" "0031b2aaeba16a973a7126a3919fedd6" "0031b2aaeba16a973a7126a3919fedd6" "0031b2aaeba16a973a7126a3919fedd6" ...
##  $ dog_name                  : chr  "basso" "buddy" "cassandra" "chief" ...
##  $ time_thinking             : Factor w/ 4 levels "lt_one_wk","one_wk_to_six_mo",..: 2 2 2 2 2 3 3 2 2 2 ...
##  $ primary_motivation        : Factor w/ 8 levels "for_other_pet",..: 2 2 8 6 1 2 2 1 1 2 ...
##  $ acquisition_source        : Factor w/ 6 levels "foreign","breeder",..: 2 2 1 2 6 2 2 6 3 6 ...
##  $ met_expectations          : Ord.factor w/ 3 levels "no"<"partial"<..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ is_living_with_dog        : logi  FALSE FALSE TRUE TRUE TRUE TRUE ...
##  $ time_together_len         : Factor w/ 4 levels "lt_one_wk","one_wk_to_six_mo",..: 4 4 NA NA NA NA NA NA NA NA ...
##  $ curr_dog_location         : Factor w/ 6 levels "euthanized","died",..: 2 2 NA NA NA NA NA NA NA NA ...
##  $ is_consider_another_dog   : logi  TRUE NA NA NA NA NA ...
##  $ revised_acquisition_source: Factor w/ 6 levels "foreign","breeder",..: 2 NA NA NA NA NA 2 6 6 6 ...
##  $ owner_gender              : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 2 2 1 ...
##  $ owner_age                 : num  57 57 57 57 57 43 43 70 70 75 ...
##  $ age_rank                  : num  2 2 3 1 4 3 6 5 5 7 ...
##  $ appearance_rank           : num  6 4 1 3 3 4 5 7 4 6 ...
##  $ breed_rank                : num  1 1 2 2 7 1 1 6 2 1 ...
##  $ compatability_rank        : num  3 5 4 4 1 5 3 2 6 4 ...
##  $ personality_rank          : num  5 6 5 5 6 6 4 1 1 2 ...
##  $ size_rank                 : num  7 7 6 6 5 7 7 3 7 5 ...
##  $ trainability_rank         : num  7 7 7 7 7 2 2 4 3 3 ...
##  $ revised_age_rank          : num  2 NA NA NA NA NA 3 6 7 5 ...
##  $ revised_appearance_rank   : num  4 NA NA NA NA NA 7 5 1 6 ...
##  $ revised_breed_rank        : num  1 NA NA NA NA NA 4 4 2 1 ...
##  $ revised_compatability_rank: num  5 NA NA NA NA NA 1 3 5 2 ...
##  $ revised_personality_rank  : num  3 NA NA NA NA NA 6 1 6 4 ...
##  $ revised_size_rank         : num  6 NA NA NA NA NA 5 7 3 7 ...
##  $ revised_trainability_rank : num  7 NA NA NA NA NA 2 2 4 3 ...
##  $ is_satisfied              : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ is_owner_male             : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
stopifnot(identical(dim(df)+0, c(1537, 29)))

A summary of the loaded data is provided below.

summary(df)
##    owner_id           dog_name                     time_thinking
##  Length:1537        Length:1537        lt_one_wk          :196  
##  Class :character   Class :character   one_wk_to_six_mo   :752  
##  Mode  :character   Mode  :character   gt_six_mo_to_six_yr:528  
##                                        gt_six_yr          : 61  
##                                                                 
##                                                                 
##                                                                 
##      primary_motivation        acquisition_source met_expectations
##  companionship:846      foreign         : 12      no     :   7    
##  for_other_pet:224      breeder         :480      partial: 248    
##  other        :187      family_or_friend:130      yes    :1282    
##  working      :100      found           : 51                      
##  exercise     : 77      pet_shop        : 68                      
##  family       : 70      rescue          :796                      
##  (Other)      : 33                                                
##  is_living_with_dog           time_together_len   curr_dog_location
##  Mode :logical      lt_one_wk          :   1    euthanized :  34   
##  FALSE:190          one_wk_to_six_mo   :   4    died       : 142   
##  TRUE :1347         gt_six_mo_to_six_yr:  36    lost       :   1   
##                     gt_six_yr          : 149    rehomed    :   5   
##                     NA's               :1347    surrendered:   1   
##                                                 other      :   7   
##                                                 NA's       :1347   
##  is_consider_another_dog    revised_acquisition_source owner_gender 
##  Mode :logical           foreign         :  4          female:1433  
##  FALSE:61                breeder         :226          male  : 104  
##  TRUE :876               family_or_friend: 10                       
##  NA's :600               pet_shop        :  8                       
##                          found           :  6                       
##                          rescue          :622                       
##                          NA's            :661                       
##    owner_age        age_rank     appearance_rank   breed_rank   
##  Min.   :18.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:42.00   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :54.00   Median :4.000   Median :5.000   Median :4.000  
##  Mean   :52.31   Mean   :4.314   Mean   :4.541   Mean   :3.632  
##  3rd Qu.:64.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :85.00   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##  compatability_rank personality_rank   size_rank     trainability_rank
##  Min.   :1.000      Min.   :1.0      Min.   :1.000   Min.   :1.000    
##  1st Qu.:2.000      1st Qu.:1.0      1st Qu.:3.000   1st Qu.:3.000    
##  Median :5.000      Median :2.0      Median :4.000   Median :5.000    
##  Mean   :4.492      Mean   :2.5      Mean   :4.281   Mean   :4.651    
##  3rd Qu.:7.000      3rd Qu.:3.0      3rd Qu.:6.000   3rd Qu.:6.000    
##  Max.   :7.000      Max.   :7.0      Max.   :7.000   Max.   :7.000    
##                                                                       
##  revised_age_rank revised_appearance_rank revised_breed_rank
##  Min.   :1.000    Min.   :1.000           Min.   :1.000     
##  1st Qu.:3.000    1st Qu.:4.000           1st Qu.:3.000     
##  Median :5.000    Median :6.000           Median :5.000     
##  Mean   :4.595    Mean   :5.467           Mean   :4.377     
##  3rd Qu.:6.000    3rd Qu.:7.000           3rd Qu.:6.000     
##  Max.   :7.000    Max.   :7.000           Max.   :7.000     
##  NA's   :661      NA's   :661             NA's   :661       
##  revised_compatability_rank revised_personality_rank revised_size_rank
##  Min.   :1.000              Min.   :1.00             Min.   :1.000    
##  1st Qu.:1.000              1st Qu.:1.00             1st Qu.:3.000    
##  Median :3.000              Median :2.00             Median :4.000    
##  Mean   :3.334              Mean   :2.16             Mean   :4.301    
##  3rd Qu.:5.000              3rd Qu.:3.00             3rd Qu.:6.000    
##  Max.   :7.000              Max.   :7.00             Max.   :7.000    
##  NA's   :661                NA's   :661              NA's   :661      
##  revised_trainability_rank is_satisfied    is_owner_male  
##  Min.   :1.000             Mode :logical   Mode :logical  
##  1st Qu.:3.000             FALSE:255       FALSE:1433     
##  Median :4.000             TRUE :1282      TRUE :104      
##  Mean   :4.009                                            
##  3rd Qu.:5.000                                            
##  Max.   :7.000                                            
##  NA's   :661

Descriptive Stats

Owner Demographics

The number of participating owners is equal to the number of unique owner identifiers.

length(unique(df$owner_id))
## [1] 933

Generate a breakdown of owner gender.

df %>%
  dplyr::distinct(owner_id, .keep_all=TRUE) %>%
  dplyr::count(owner_gender) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  owner_gender   n   freq
##        female 859  92.07
##          male  74   7.93
##         Total 933 100.00

Generate summary statistics for owner ages.

summary(
  df %>%
    dplyr::distinct(owner_id, .keep_all=TRUE) %>%
    dplyr::select(owner_age)
)
##    owner_age    
##  Min.   :18.00  
##  1st Qu.:42.00  
##  Median :53.00  
##  Mean   :51.53  
##  3rd Qu.:63.00  
##  Max.   :85.00

Examine the distribution of owner ages.

df %>%
  ggplot(aes(x=owner_age)) +
    geom_histogram(alpha=0.5, position="identity", aes(y = ..density..),
                   color="black", bins=30) +
    geom_density() +
    geom_vline(xintercept=mean(df$owner_age), color="black", linetype="dashed",
               size=1)

Examine the distribution of ages split by owner gender.

df %>%
  dplyr::select(is_owner_male, owner_age) %>%
  dplyr::transmute(male_age=ifelse(is_owner_male==TRUE, owner_age, NA),
                   female_age=ifelse(is_owner_male==FALSE, owner_age, NA)) %>%
  summary()
##     male_age       female_age   
##  Min.   :20.00   Min.   :18.00  
##  1st Qu.:52.75   1st Qu.:42.00  
##  Median :62.00   Median :54.00  
##  Mean   :57.88   Mean   :51.91  
##  3rd Qu.:67.25   3rd Qu.:63.00  
##  Max.   :75.00   Max.   :85.00  
##  NA's   :1433    NA's   :104
means <- df %>%
  dplyr::group_by(owner_gender) %>%
  dplyr::summarise(means = mean(owner_age))
df %>%
  ggplot(aes(owner_age, fill = owner_gender)) +
    geom_histogram(alpha=0.7, position="identity", aes(y = ..density..),
                   color="black", bins=30) +
    geom_density(alpha=0.7) +
    geom_vline(xintercept=means$means, color=c("#F8766D", "#00BFC4"),
               linetype="dashed", size=0.75)

Dog Demographics

The number of dogs corresponds to the number of rows in the data frame.

dim(df)[1]
## [1] 1537

Generate summary statistics for the number of dogs per household.

summary(as.data.frame(table(df$owner_id))$Freq)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.647   2.000  10.000

Determine the number of single dog households.

df %>%
  dplyr::count(owner_id) %>%
  dplyr::mutate(freq = n / sum(n)) %>%
  dplyr::filter(n == 1) %>%
  dplyr::mutate(freq = round(sum(freq) * 100, 2)) %>%
  dplyr::count(freq)
##    freq   n
## 1 32.21 495

Generate breakdown of living situation.

df %>%
  dplyr::count(is_living_with_dog) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_living_with_dog    n   freq
##               FALSE  190  12.36
##                TRUE 1347  87.64
##               Total 1537 100.00

Dogs Not-Living-With-Owner Demographics

df %>%
  dplyr::filter(is_living_with_dog == FALSE) %>%
  dplyr::count(time_together_len) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##    time_together_len   n   freq
##            lt_one_wk   1   0.53
##     one_wk_to_six_mo   4   2.11
##  gt_six_mo_to_six_yr  36  18.95
##            gt_six_yr 149  78.42
##                Total 190 100.01
df %>%
  dplyr::filter(is_living_with_dog == FALSE) %>%
  dplyr::count(curr_dog_location) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  curr_dog_location   n   freq
##         euthanized  34  17.89
##               died 142  74.74
##               lost   1   0.53
##            rehomed   5   2.63
##        surrendered   1   0.53
##              other   7   3.68
##              Total 190 100.00

Expectations and Considerations

Generate breakdown for meeting of expectations.

df %>%
  dplyr::count(met_expectations) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  met_expectations    n   freq
##                no    7   0.46
##           partial  248  16.14
##               yes 1282  83.41
##             Total 1537 100.01
df %>%
  dplyr::group_by(is_satisfied) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_satisfied    n   freq
##         FALSE  255  16.59
##          TRUE 1282  83.41
##         Total 1537 100.00

Get count of owners with at least one dog that met expectations.

df %>%
  dplyr::group_by(owner_id) %>%
  dplyr::summarise(sat_cnt = sum(is_satisfied)) %>%
  dplyr::filter(sat_cnt > 0)
## # A tibble: 843 × 2
##    owner_id                         sat_cnt
##    <chr>                              <int>
##  1 0031b2aaeba16a973a7126a3919fedd6       5
##  2 00f2854a7b7bcd35db54310d3d9ea142       2
##  3 0143addbe877065bb8d940e6e8901700       2
##  4 018b0b08b0a8dbc63f58c47b0c94d2e4       2
##  5 01a4ef3f5851f6796465f7678bcc8612       2
##  6 01b1aee4b9aade64e464d6fc40dc679b       1
##  7 01d2a80069fbbc5d0151d4b108cbcfed       1
##  8 01f155269919ba42302c9ce859842872       2
##  9 02093f7a704d1d81a701e841681c0cac       1
## 10 02096176f012a8064b0187ce4a946f7b       1
## # … with 833 more rows

Get a count of owners where all dogs met expectations.

df %>%
  dplyr::group_by(owner_id) %>%
  dplyr::summarise(sat_cnt = sum(is_satisfied), tot_cnt=n()) %>%
  dplyr::filter(sat_cnt == tot_cnt)
## # A tibble: 707 × 3
##    owner_id                         sat_cnt tot_cnt
##    <chr>                              <int>   <int>
##  1 0031b2aaeba16a973a7126a3919fedd6       5       5
##  2 00f2854a7b7bcd35db54310d3d9ea142       2       2
##  3 0143addbe877065bb8d940e6e8901700       2       2
##  4 018b0b08b0a8dbc63f58c47b0c94d2e4       2       2
##  5 01a4ef3f5851f6796465f7678bcc8612       2       2
##  6 01b1aee4b9aade64e464d6fc40dc679b       1       1
##  7 01d2a80069fbbc5d0151d4b108cbcfed       1       1
##  8 01f155269919ba42302c9ce859842872       2       2
##  9 02093f7a704d1d81a701e841681c0cac       1       1
## 10 025fdb64fcd4cf4a3a8b5a750b7abc07       2       2
## # … with 697 more rows

Generate breakdown of acquisition sources.

df %>%
  dplyr::count(acquisition_source) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  acquisition_source    n   freq
##             foreign   12   0.78
##             breeder  480  31.23
##    family_or_friend  130   8.46
##               found   51   3.32
##            pet_shop   68   4.42
##              rescue  796  51.79
##               Total 1537 100.00

Generate breakdown for time spent thinking.

df %>%
  dplyr::count(time_thinking) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##        time_thinking    n   freq
##            lt_one_wk  196  12.75
##     one_wk_to_six_mo  752  48.93
##  gt_six_mo_to_six_yr  528  34.35
##            gt_six_yr   61   3.97
##                Total 1537 100.00

Generate breakdown for primary motivation.

df %>%
  dplyr::count(primary_motivation) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  primary_motivation    n   freq
##       for_other_pet  224  14.57
##       companionship  846  55.04
##            exercise   77   5.01
##          protection   12   0.78
##              social   21   1.37
##              family   70   4.55
##             working  100   6.51
##               other  187  12.17
##               Total 1537 100.00

Generate a summary for characteristic ranks.

df %>%
  dplyr::select(contains("rank") & !contains("revised")) %>%
  summary()
##     age_rank     appearance_rank   breed_rank    compatability_rank
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000     
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:1.000   1st Qu.:2.000     
##  Median :4.000   Median :5.000   Median :4.000   Median :5.000     
##  Mean   :4.314   Mean   :4.541   Mean   :3.632   Mean   :4.492     
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000     
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000     
##  personality_rank   size_rank     trainability_rank
##  Min.   :1.0      Min.   :1.000   Min.   :1.000    
##  1st Qu.:1.0      1st Qu.:3.000   1st Qu.:3.000    
##  Median :2.0      Median :4.000   Median :5.000    
##  Mean   :2.5      Mean   :4.281   Mean   :4.651    
##  3rd Qu.:3.0      3rd Qu.:6.000   3rd Qu.:6.000    
##  Max.   :7.0      Max.   :7.000   Max.   :7.000

Consideration of Another Dog

Breakdown of owners that would consider getting another dog.

df %>%
  dplyr::group_by(owner_id) %>%
  tidyr::fill(is_consider_another_dog, .direction="downup") %>%
  dplyr::ungroup() %>%
  dplyr::distinct(owner_id, .keep_all=TRUE) %>%
  dplyr::count(is_consider_another_dog) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_consider_another_dog   n   freq
##                    FALSE  61   6.54
##                     TRUE 851  91.21
##                     <NA>  21   2.25
##                    Total 933 100.00

Breakdown of fully satisfied owners considering another dog.

df %>%
  dplyr::group_by(owner_id) %>%
  dplyr::mutate(sat_cnt = sum(is_satisfied), tot_cnt=n()) %>%
  dplyr::filter(sat_cnt == tot_cnt) %>%
  tidyr::fill(is_consider_another_dog, .direction="downup") %>%
  dplyr::ungroup() %>%
  dplyr::distinct(owner_id, .keep_all=TRUE) %>%
  dplyr::count(is_consider_another_dog) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_consider_another_dog   n   freq
##                    FALSE  44   6.22
##                     TRUE 644  91.09
##                     <NA>  19   2.69
##                    Total 707 100.00

Breakdown of owners satisfied with some, but not all, dogs considering another dog.

df %>%
  dplyr::group_by(owner_id) %>%
  dplyr::mutate(sat_cnt = sum(is_satisfied), tot_cnt=n()) %>%
  dplyr::filter(sat_cnt < tot_cnt, sat_cnt > 0) %>%
  tidyr::fill(is_consider_another_dog, .direction="downup") %>%
  dplyr::ungroup() %>%
  dplyr::distinct(owner_id, .keep_all=TRUE) %>%
  dplyr::count(is_consider_another_dog) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_consider_another_dog   n   freq
##                    FALSE   4   2.94
##                     TRUE 131  96.32
##                     <NA>   1   0.74
##                    Total 136 100.00

Breakdown of owners not satisfied with any dogs considering another dog.

df %>%
  dplyr::group_by(owner_id) %>%
  dplyr::mutate(sat_cnt = sum(is_satisfied), tot_cnt=n()) %>%
  dplyr::filter(sat_cnt == 0) %>%
  tidyr::fill(is_consider_another_dog, .direction="downup") %>%
  dplyr::ungroup() %>%
  dplyr::distinct(owner_id, .keep_all=TRUE) %>%
  dplyr::count(is_consider_another_dog) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  is_consider_another_dog  n  freq
##                    FALSE 13 14.44
##                     TRUE 76 84.44
##                     <NA>  1  1.11
##                    Total 90 99.99

Breakdown of revised acquisition source.

df %>%
  dplyr::group_by(revised_acquisition_source) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::mutate(freq = round(n / sum(n) * 100, 2)) %>%
  janitor::adorn_totals("row")
##  revised_acquisition_source    n   freq
##                     foreign    4   0.26
##                     breeder  226  14.70
##            family_or_friend   10   0.65
##                    pet_shop    8   0.52
##                       found    6   0.39
##                      rescue  622  40.47
##                        <NA>  661  43.01
##                       Total 1537 100.00

Generate a summary for revised characteristic ranks.

df %>%
  dplyr::select(contains("rank") & contains("revised")) %>%
  summary()
##  revised_age_rank revised_appearance_rank revised_breed_rank
##  Min.   :1.000    Min.   :1.000           Min.   :1.000     
##  1st Qu.:3.000    1st Qu.:4.000           1st Qu.:3.000     
##  Median :5.000    Median :6.000           Median :5.000     
##  Mean   :4.595    Mean   :5.467           Mean   :4.377     
##  3rd Qu.:6.000    3rd Qu.:7.000           3rd Qu.:6.000     
##  Max.   :7.000    Max.   :7.000           Max.   :7.000     
##  NA's   :661      NA's   :661             NA's   :661       
##  revised_compatability_rank revised_personality_rank revised_size_rank
##  Min.   :1.000              Min.   :1.00             Min.   :1.000    
##  1st Qu.:1.000              1st Qu.:1.00             1st Qu.:3.000    
##  Median :3.000              Median :2.00             Median :4.000    
##  Mean   :3.334              Mean   :2.16             Mean   :4.301    
##  3rd Qu.:5.000              3rd Qu.:3.00             3rd Qu.:6.000    
##  Max.   :7.000              Max.   :7.00             Max.   :7.000    
##  NA's   :661                NA's   :661              NA's   :661      
##  revised_trainability_rank
##  Min.   :1.000            
##  1st Qu.:3.000            
##  Median :4.000            
##  Mean   :4.009            
##  3rd Qu.:5.000            
##  Max.   :7.000            
##  NA's   :661

Difference in rank between revised rank and average of prior ranks.

df %>%
  dplyr::select(contains("rank")) %>%
  dplyr::summarise(
    dplyr::across(
      everything(),~ janitor::round_half_up(mean(., na.rm = TRUE), 2))) %>%
  dplyr::mutate(
    age_delta = revised_age_rank - age_rank,
    app_delta = revised_appearance_rank - appearance_rank,
    breed_delta = revised_breed_rank - breed_rank,
    comp_delta = revised_compatability_rank - compatability_rank,
    pers_delta = revised_personality_rank - personality_rank,
    size_delta = revised_size_rank - size_rank,
    train_delta = revised_trainability_rank - trainability_rank) %>%
  summary()
##     age_rank    appearance_rank   breed_rank   compatability_rank
##  Min.   :4.31   Min.   :4.54    Min.   :3.63   Min.   :4.49      
##  1st Qu.:4.31   1st Qu.:4.54    1st Qu.:3.63   1st Qu.:4.49      
##  Median :4.31   Median :4.54    Median :3.63   Median :4.49      
##  Mean   :4.31   Mean   :4.54    Mean   :3.63   Mean   :4.49      
##  3rd Qu.:4.31   3rd Qu.:4.54    3rd Qu.:3.63   3rd Qu.:4.49      
##  Max.   :4.31   Max.   :4.54    Max.   :3.63   Max.   :4.49      
##  personality_rank   size_rank    trainability_rank revised_age_rank
##  Min.   :2.5      Min.   :4.28   Min.   :4.65      Min.   :4.59    
##  1st Qu.:2.5      1st Qu.:4.28   1st Qu.:4.65      1st Qu.:4.59    
##  Median :2.5      Median :4.28   Median :4.65      Median :4.59    
##  Mean   :2.5      Mean   :4.28   Mean   :4.65      Mean   :4.59    
##  3rd Qu.:2.5      3rd Qu.:4.28   3rd Qu.:4.65      3rd Qu.:4.59    
##  Max.   :2.5      Max.   :4.28   Max.   :4.65      Max.   :4.59    
##  revised_appearance_rank revised_breed_rank revised_compatability_rank
##  Min.   :5.47            Min.   :4.38       Min.   :3.33              
##  1st Qu.:5.47            1st Qu.:4.38       1st Qu.:3.33              
##  Median :5.47            Median :4.38       Median :3.33              
##  Mean   :5.47            Mean   :4.38       Mean   :3.33              
##  3rd Qu.:5.47            3rd Qu.:4.38       3rd Qu.:3.33              
##  Max.   :5.47            Max.   :4.38       Max.   :3.33              
##  revised_personality_rank revised_size_rank revised_trainability_rank
##  Min.   :2.16             Min.   :4.3       Min.   :4.01             
##  1st Qu.:2.16             1st Qu.:4.3       1st Qu.:4.01             
##  Median :2.16             Median :4.3       Median :4.01             
##  Mean   :2.16             Mean   :4.3       Mean   :4.01             
##  3rd Qu.:2.16             3rd Qu.:4.3       3rd Qu.:4.01             
##  Max.   :2.16             Max.   :4.3       Max.   :4.01             
##    age_delta      app_delta     breed_delta     comp_delta      pers_delta   
##  Min.   :0.28   Min.   :0.93   Min.   :0.75   Min.   :-1.16   Min.   :-0.34  
##  1st Qu.:0.28   1st Qu.:0.93   1st Qu.:0.75   1st Qu.:-1.16   1st Qu.:-0.34  
##  Median :0.28   Median :0.93   Median :0.75   Median :-1.16   Median :-0.34  
##  Mean   :0.28   Mean   :0.93   Mean   :0.75   Mean   :-1.16   Mean   :-0.34  
##  3rd Qu.:0.28   3rd Qu.:0.93   3rd Qu.:0.75   3rd Qu.:-1.16   3rd Qu.:-0.34  
##  Max.   :0.28   Max.   :0.93   Max.   :0.75   Max.   :-1.16   Max.   :-0.34  
##    size_delta    train_delta   
##  Min.   :0.02   Min.   :-0.64  
##  1st Qu.:0.02   1st Qu.:-0.64  
##  Median :0.02   Median :-0.64  
##  Mean   :0.02   Mean   :-0.64  
##  3rd Qu.:0.02   3rd Qu.:-0.64  
##  Max.   :0.02   Max.   :-0.64

Data Exploration

Numeric Correlations

pairs.panels(df[,12:19])

Fields Split by Met Expecations

Owner Fields

Split by owner gender.

df %>%
  dplyr::count(owner_gender, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##  owner_gender FALSE TRUE Total
##        female   239 1194  1433
##          male    16   88   104
##         Total   255 1282  1537
df %>%
  dplyr::group_by(owner_gender) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  ggplot(aes(x=reorder(owner_gender, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("owner gender") +
    ylab("frequency")

Split by owner age.

means <- df %>%
  dplyr::group_by(is_satisfied) %>%
  dplyr::summarise(means = mean(owner_age))
df %>%
  ggplot(aes(owner_age, fill = is_satisfied)) +
    geom_histogram(alpha=0.7, position="identity", aes(y = ..density..),
                   color="black", bins=30) +
    geom_density(alpha=0.7) +
    geom_vline(xintercept=means$means, color=c("#F8766D", "#00BFC4"),
               linetype="dashed", size=0.75)

Dog Fields

Split by acquisition source.

df %>%
  dplyr::count(acquisition_source, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##  acquisition_source FALSE TRUE Total
##             foreign     2   10    12
##             breeder    56  424   480
##    family_or_friend    27  103   130
##               found     7   44    51
##            pet_shop    11   57    68
##              rescue   152  644   796
##               Total   255 1282  1537
df %>%
  dplyr::group_by(acquisition_source) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  dplyr::mutate(
    acquisition_source = fct_recode(
      acquisition_source,
      "family/friend"="family_or_friend",
      "pet shop"="pet_shop",
      "shelter/rescue"="rescue"
    )
  ) %>%
  ggplot(aes(x=reorder(acquisition_source, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("acquisition source") +
    ylab("frequency") +
    theme(axis.text.x = element_text(angle = 45, hjust=1))

Split by living situation.

df %>%
  dplyr::count(is_living_with_dog, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##  is_living_with_dog FALSE TRUE Total
##               FALSE    32  158   190
##                TRUE   223 1124  1347
##               Total   255 1282  1537
df %>%
  dplyr::group_by(is_living_with_dog) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  ggplot(aes(x=reorder(is_living_with_dog, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("living with dog") +
    ylab("frequency")

Considerations Fields

Split by time spent thinking.

df %>%
  dplyr::count(time_thinking, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##        time_thinking FALSE TRUE Total
##            lt_one_wk    28  168   196
##     one_wk_to_six_mo   127  625   752
##  gt_six_mo_to_six_yr    84  444   528
##            gt_six_yr    16   45    61
##                Total   255 1282  1537
df %>%
  dplyr::group_by(time_thinking) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  ggplot(aes(x=reorder(time_thinking, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("time spent thinking") +
    ylab("frequency")

Split by primary motivation.

df %>%
  dplyr::count(primary_motivation, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##  primary_motivation FALSE TRUE Total
##       for_other_pet    43  181   224
##       companionship   102  744   846
##            exercise    19   58    77
##          protection     4    8    12
##              social     4   17    21
##              family    21   49    70
##             working    18   82   100
##               other    44  143   187
##               Total   255 1282  1537
df %>%
  dplyr::group_by(primary_motivation) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  ggplot(aes(x=reorder(primary_motivation, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("primary motivation") +
    ylab("frequency") +
    theme(axis.text.x = element_text(angle = 45, hjust=1))

Split by consideration of another dog.

df %>%
  dplyr::filter(!is.na(is_consider_another_dog)) %>%
  dplyr::count(is_consider_another_dog, is_satisfied) %>%
  tidyr::spread(is_satisfied, n) %>%
  janitor::adorn_totals("row") %>%
  janitor::adorn_totals("col")
##  is_consider_another_dog FALSE TRUE Total
##                    FALSE    15   46    61
##                     TRUE   156  720   876
##                    Total   171  766   937
df %>%
  dplyr::filter(!is.na(is_consider_another_dog)) %>%
  dplyr::group_by(is_consider_another_dog) %>%
  dplyr::mutate(freq = sum(is_satisfied) / n()) %>%
  ggplot(aes(x=reorder(is_consider_another_dog, desc(freq)), fill=is_satisfied)) +
    geom_bar(position="fill") +
    xlab("would consider an additional dog") +
    ylab("frequency") +
    theme(axis.text.x = element_text(angle = 45, hjust=1))

Split by ranks.

featurePlot(
  x = df[, 13:19],
  y = factor(df$is_satisfied),
  plot = "box",
  scales = list(y = list(relation="free"), x = list(rot = 90)),
  layout = c(4,2),
  auto.key = list(columns = 2)
)

Split by revised ranks.

featurePlot(
  x = df[, 20:26],
  y = factor(df$is_satisfied),
  plot = "box",
  scales = list(y = list(relation="free"), x = list(rot = 90)),
  layout = c(4,2),
  auto.key = list(columns = 2)
)

Split by source.

featurePlot(
  x = df[, 13:19],
  y = factor(df$acquisition_source),
  plot = "box",
  scales = list(y = list(relation="free"), x = list(rot = 90)),
  layout = c(4,2),
  auto.key = list(columns = 2)
)

Contingency Tables

table(df$primary_motivation, df$time_thinking)
##                
##                 lt_one_wk one_wk_to_six_mo gt_six_mo_to_six_yr gt_six_yr
##   for_other_pet        22              145                  54         3
##   companionship        74              401                 326        45
##   exercise              2               37                  33         5
##   protection            0                7                   5         0
##   social                2                9                  10         0
##   family                9               38                  22         1
##   working               5               47                  47         1
##   other                82               68                  31         6
table(df$acquisition_source, df$time_thinking)
##                   
##                    lt_one_wk one_wk_to_six_mo gt_six_mo_to_six_yr gt_six_yr
##   foreign                  2                6                   4         0
##   breeder                 23              192                 234        31
##   family_or_friend        35               63                  25         7
##   found                   24               17                   9         1
##   pet_shop                 7               38                  20         3
##   rescue                 105              436                 236        19

Mosiac Plots

Plot of background variables to be used in model.

#mosaic(~ owner_gender + primary_motivation + is_satisfied, data=df,
#       highlighting = "is_satisfied", highlighting_fill = c("lightblue", "pink"),
#       direction = c("h", "v", "v"))

Violin Plots

df_long <- df %>%
  dplyr::select(owner_gender, is_satisfied, dplyr::contains("rank"), time_thinking) %>%
  rename_with(~ gsub("revised_(.*)", "\\1_revised", .)) %>%
  tidyr::pivot_longer(
    cols=age_rank:trainability_rank_revised,
    names_to=c("char", "revised"),
    names_pattern="(.*)_rank_?(revised)?",
    values_to="rank",
    values_drop_na=TRUE
  ) %>%
  dplyr::mutate(
    Adoption=factor(ifelse(revised =="revised", "future", "past"),
                    levels=c("past", "future"))) %>%
  dplyr::mutate(char=as.factor(char))

width <- 5
df_long %>%
  ggplot(aes(x=char, y=rank, fill=Adoption)) +
    introdataviz::geom_split_violin(alpha=.5, trim=T) +
    geom_boxplot(width=.125, alpha=.8, show.legend=FALSE) +
    scale_x_discrete(name = "Characteristic") +
    scale_y_continuous(name = "Rank",
                       limits = c(1, 7),
                       breaks = seq(1, 7, 1))

ggsave("fig-1-violin-plot.pdf")
## Saving 7 x 5 in image

Ridge Plots

df_long %>%
  dplyr::filter(Adoption=="past") %>%
  ggplot(aes(x=rank, y=char, fill = 0.5 - abs(0.5 - stat(ecdf)))) +
    stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE) +
    scale_fill_viridis_c(name = "Tail probability", direction = -1)
## Picking joint bandwidth of 0.386

df_long %>%
  dplyr::filter(Adoption=="future") %>%
  ggplot(aes(x=rank, y=char, fill = 0.5 - abs(0.5 - stat(ecdf)))) +
    stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE) +
    scale_fill_viridis_c(name = "Tail probability", direction = -1)
## Picking joint bandwidth of 0.398

df_long %>%
  ggplot(aes(x=rank, y=char, fill=Adoption,
             group=interaction(char, !ifelse(Adoption=="past", F, T)))) +
    ggridges::geom_density_ridges(alpha=0.5) +
  theme_ridges() +
  labs(
    x = "Rank",
    y = "Characteristic",
  )
## Picking joint bandwidth of 0.392

ggsave("fig-1-ridgeline-plot.pdf")
## Saving 7 x 5 in image
## Picking joint bandwidth of 0.392

Jitter Plots

df_long2 <- df_long %>%
  mutate(char2 = 2*as.numeric(as.factor(char))+0.8*as.numeric(as.factor(Adoption)))

df_long2 %>%
  ggplot(aes(x=char2, y=rank)) +
    geom_jitter(aes(color=Adoption), width=0.3, alpha=0.3) +
    geom_boxplot(aes(x=char2, y=rank,
                     group=interaction(char, Adoption)),
                 width=0.35, alpha=0.1) +
    scale_x_continuous(breaks=unique(df_long2$char2[1:7])+.4,
                       labels=unique(df_long2$char)) +
    labs(
      y = "Rank",
      x = "Characteristic",
    ) +
    guides(colour = guide_legend(override.aes = list(alpha = 1)))

ggsave("fig-1-jitter-plot.pdf")
## Saving 7 x 5 in image

Box Plots

df_long2 <- df_long %>%
  mutate(char2 = 2*as.numeric(as.factor(char))+0.8*as.numeric(as.factor(Adoption)))

df_long2 %>%
  ggplot(aes(x=char2, y=rank)) +
    #geom_jitter(aes(color=Adoption), width=0.3, alpha=0.3) +
    geom_boxplot(aes(x=char2, y=rank,
                     group=interaction(char, Adoption),
                     color=Adoption),
                 width=0.35, alpha=0.1) +
    scale_x_continuous(breaks=unique(df_long2$char2[1:7])+.4,
                       labels=unique(df_long2$char)) +
    labs(
      y = "Rank",
      x = "Characteristic",
    ) +
    guides(colour = guide_legend(override.aes = list(alpha = 1)))

ggsave("fig-1-box-plot.pdf")
## Saving 7 x 5 in image
df_long2 %>%
  ggplot(aes(x=char, y=rank, fill=Adoption)) +
  geom_boxplot() +
  labs(
    y = "Rank",
    x = "Characteristic",
  )

ggsave("fig-1-box-plot.pdf")
## Saving 7 x 5 in image

Nested Bar Chart

df_long_totals <- df_long %>%
  dplyr::select(owner_gender, is_satisfied, time_thinking) %>%
  plyr::ddply(.(owner_gender, time_thinking), summarize, is_satisfied=sum(is_satisfied))


titanic.colors<-list("gray90",c("#0000ff","#7700ee","#aa00cc","#dd00aa"),
c("#ddcc00","#ee9900"),c("pink","lightblue"))

barNest(
  is_satisfied~owner_gender+time_thinking,
  data=df_long_totals,
  col=titanic.colors,
  #showall=TRUE,
  main="Owner satisfaction by owner gender, age, and forethought",
  ylab="Proportion satisfied"
  #FUN=c("propbrk")#,"binciWu","binciWl","valid.n"),
  #shrink=0.15
  #trueval="Yes"
)

Principal Component Analysis

df_pca <- df %>%
  dplyr::select(contains("rank") & !contains("revised"))
pc <- prcomp(df_pca, center = TRUE, scale = TRUE)
attributes(pc)
## $names
## [1] "sdev"     "rotation" "center"   "scale"    "x"       
## 
## $class
## [1] "prcomp"
print(pc)
## Standard deviations (1, .., p=7):
## [1] 1.3320830 1.1654028 1.0729902 1.0003757 0.9554394 0.8414860 0.3071946
## 
## Rotation (n x k) = (7 x 7):
##                           PC1         PC2         PC3          PC4         PC5
## age_rank            0.2763080 -0.18350266  0.50546070 -0.633087619  0.30842543
## appearance_rank     0.4510782 -0.07276318 -0.37139463  0.436724495  0.46081320
## breed_rank          0.2979285  0.69415885  0.07858535 -0.005516831 -0.18557153
## compatability_rank -0.4407692 -0.16162426  0.54026406  0.506224362 -0.06710822
## personality_rank   -0.4440301 -0.16790994 -0.34920570 -0.216328331  0.47665141
## size_rank           0.2257576 -0.56457471 -0.26666500 -0.113386110 -0.62307390
## trainability_rank  -0.4347577  0.32575304 -0.33979371 -0.304177390 -0.19536375
##                            PC6        PC7
## age_rank            0.19622592 -0.3163216
## appearance_rank     0.32449813 -0.3806887
## breed_rank         -0.42870529 -0.4527151
## compatability_rank  0.05345027 -0.4733828
## personality_rank   -0.54379407 -0.2880839
## size_rank          -0.14332111 -0.3708912
## trainability_rank   0.59438899 -0.3246872
stats::biplot(pc, scale = 0)

# Explained variance.
round(pc$sdev^2 / sum(pc$sdev^2), 2)
## [1] 0.25 0.19 0.16 0.14 0.13 0.10 0.01
g <- ggbiplot(pc,
              obs.scale = 1,
              var.scale = 1,
              groups = df$is_satisfied,
              ellipse = TRUE,
              circle = TRUE,
              ellipse.prob = 0.68)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
               legend.position = 'top')
print(g)

pc <- PCAtools::pca(df_pca)
PCAtools::screeplot(pc)

PCAtools::biplot(pc, showLoadings = T)

PCAtools::plotloadings(pc, labSize = 3)
## -- variables retained:
## 27, 81, 87, 150, 153, 169, 173, 194, 207, 249, 253, 317, 357, 368, 395, 419, 435, 454, 518, 529, 559, 570, 598, 609, 675, 760, 787, 851, 894, 1019, 1028, 1034, 1085, 1116, 1173, 1227, 1230, 1233, 1276, 1304, 1315, 1410, 1523, 1525, 23, 69, 78, 304, 306, 307, 308, 309, 310, 316, 319, 471, 552, 934, 974, 975, 981, 982, 1006, 1015, 1107, 1110, 1158, 1180, 1290, 1357, 1419, 165, 198, 205, 483, 535, 560, 705, 824, 843, 864, 1178, 1255, 1257, 1515, 25, 1372, 555, 850, 21, 720, 1104, 1404, 74, 556, 733, 859, 1204, 84, 256, 779, 1065, 1105, 1141, 1226, 1261, 1306, 1334, 144, 283, 572, 677, 699, 754, 805, 1441, 248, 619, 993
## Warning: ggrepel: 579 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

pairsplot(
  pc,
  triangle = TRUE,
  trianglelabSize = 12,
  hline = 0,
  vline = 0,
  pointSize = 1
)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Factor Analysis

df_rank <- df[,c(13, 18:19)]
fa(r=cor(df_rank), nfactors=dim(df_rank)[2], rotate="varimax", SMC=FALSE, fm="pa")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Factor Analysis using method =  pa
## Call: fa(r = cor(df_rank), nfactors = dim(df_rank)[2], rotate = "varimax", 
##     SMC = FALSE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   PA2   PA1   PA3 h2      u2 com
## owner_age        1.00  0.04  0.04  1 3.3e-16   1
## personality_rank 0.04  1.00 -0.06  1 5.6e-16   1
## size_rank        0.04 -0.06  1.00  1 3.3e-16   1
## 
##                        PA2  PA1  PA3
## SS loadings           1.00 1.00 1.00
## Proportion Var        0.33 0.33 0.33
## Cumulative Var        0.33 0.67 1.00
## Proportion Explained  0.33 0.33 0.33
## Cumulative Proportion 0.33 0.67 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  0.03
## The degrees of freedom for the model are -3  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## Fit based upon off diagonal values = 1

Save Session Info

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: aarch64-apple-darwin21.3.0 (64-bit)
## Running under: macOS Monterey 12.4
## 
## Matrix products: default
## BLAS:   /opt/homebrew/Cellar/openblas/0.3.20/lib/libopenblasp-r0.3.20.dylib
## LAPACK: /opt/homebrew/Cellar/r/4.2.0/lib/R/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] introdataviz_0.0.0.9003 ggmosaic_0.3.4          PCAtools_2.5.15        
##  [4] ggrepel_0.9.1           ggbiplot_0.55           scales_1.2.0           
##  [7] plyr_1.8.7              devtools_2.4.3          usethis_2.1.6          
## [10] plotrix_3.8-2           ggridges_0.5.3          vcd_1.4-10             
## [13] caret_6.0-92            lattice_0.20-45         psych_2.2.5            
## [16] janitor_2.1.0           forcats_0.5.1           stringr_1.4.0          
## [19] dplyr_1.0.9             purrr_0.3.4             readr_2.1.2            
## [22] tidyr_1.2.0             tibble_3.1.7            ggplot2_3.3.6          
## [25] tidyverse_1.3.1        
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0              backports_1.4.1          
##   [3] lazyeval_0.2.2            splines_4.2.0            
##   [5] BiocParallel_1.30.3       listenv_0.8.0            
##   [7] digest_0.6.29             foreach_1.5.2            
##   [9] htmltools_0.5.2           fansi_1.0.3              
##  [11] magrittr_2.0.3            memoise_2.0.1            
##  [13] ScaledMatrix_1.4.0        tzdb_0.3.0               
##  [15] remotes_2.4.2             recipes_0.2.0            
##  [17] globals_0.15.0            modelr_0.1.8             
##  [19] gower_1.0.0               matrixStats_0.62.0       
##  [21] hardhat_1.1.0             prettyunits_1.1.1        
##  [23] colorspace_2.0-3          rvest_1.0.2              
##  [25] haven_2.5.0               xfun_0.30                
##  [27] callr_3.7.0               crayon_1.5.1             
##  [29] jsonlite_1.8.0            survival_3.3-1           
##  [31] zoo_1.8-10                iterators_1.0.14         
##  [33] glue_1.6.2                gtable_0.3.0             
##  [35] ipred_0.9-12              DelayedArray_0.22.0      
##  [37] pkgbuild_1.3.1            BiocSingular_1.12.0      
##  [39] future.apply_1.9.0        BiocGenerics_0.42.0      
##  [41] DBI_1.1.3                 Rcpp_1.0.8.3             
##  [43] viridisLite_0.4.0         dqrng_0.3.0              
##  [45] rsvd_1.0.5                stats4_4.2.0             
##  [47] lava_1.6.10               prodlim_2019.11.13       
##  [49] htmlwidgets_1.5.4         httr_1.4.3               
##  [51] ellipsis_0.3.2            farver_2.1.1             
##  [53] pkgconfig_2.0.3           nnet_7.3-17              
##  [55] sass_0.4.1                dbplyr_2.2.0             
##  [57] utf8_1.2.2                labeling_0.4.2           
##  [59] tidyselect_1.1.2          rlang_1.0.3              
##  [61] reshape2_1.4.4            munsell_0.5.0            
##  [63] cellranger_1.1.0          tools_4.2.0              
##  [65] cachem_1.0.6              cli_3.3.0                
##  [67] generics_0.1.2            broom_0.8.0              
##  [69] evaluate_0.15             fastmap_1.1.0            
##  [71] yaml_2.3.5                ModelMetrics_1.2.2.2     
##  [73] processx_3.5.3            knitr_1.39               
##  [75] fs_1.5.2                  sparseMatrixStats_1.8.0  
##  [77] future_1.25.0             nlme_3.1-157             
##  [79] xml2_1.3.3                brio_1.1.3               
##  [81] compiler_4.2.0            rstudioapi_0.13          
##  [83] plotly_4.10.0             curl_4.3.2               
##  [85] testthat_3.1.4            reprex_2.0.1             
##  [87] bslib_0.3.1               stringi_1.7.6            
##  [89] highr_0.9                 ps_1.7.0                 
##  [91] desc_1.4.1                Matrix_1.4-1             
##  [93] vctrs_0.4.1               pillar_1.7.0             
##  [95] lifecycle_1.0.1           lmtest_0.9-40            
##  [97] jquerylib_0.1.4           irlba_2.3.5              
##  [99] data.table_1.14.2         cowplot_1.1.1            
## [101] R6_2.5.1                  IRanges_2.30.0           
## [103] parallelly_1.31.1         sessioninfo_1.2.2        
## [105] codetools_0.2-18          MASS_7.3-57              
## [107] assertthat_0.2.1          pkgload_1.2.4            
## [109] rprojroot_2.0.3           withr_2.5.0              
## [111] mnormt_2.1.0              S4Vectors_0.34.0         
## [113] parallel_4.2.0            hms_1.1.1                
## [115] beachmat_2.12.0           rpart_4.1.16             
## [117] timeDate_3043.102         class_7.3-20             
## [119] DelayedMatrixStats_1.18.0 rmarkdown_2.14           
## [121] snakecase_0.11.0          MatrixGenerics_1.8.1     
## [123] pROC_1.18.0               lubridate_1.8.0

Cleanup

rm(list = ls())